# -*- coding: utf-8 -*-
# @Time : 2021/11/18 20:25
# @Author : Ming
# @FileName: 表格去重.py
# @Software: PyCharm

import pandas as pd

df = pd.read_csv("./data3.csv")
print(df.head())


print('----------------------------------------------------------------')
# 按网址域名进行去重，对于重复项，保留第一次出现的值
new_df = df.drop_duplicates('网址',keep='first')
new_df.to_csv('./data.csv', index=False)
print(new_df.head())
